package com.spark.statistics

import org.apache.spark.{SparkConf, SparkContext}

/**
  * Created by Administrator on 2017/8/23.
  */
object sample {

  def main(args: Array[String]): Unit = {
    val conf=new SparkConf().setMaster("local").setAppName("statistics")
    val sc=new SparkContext(conf)
    val rdd=sc.parallelize(List(1,1,3,3,3,4,4,4,4)).map(x=>(x,x*2))

    val fractions=Map(1->0.9,2->0.3,3->0.3,4->0.3)

    println(fractions.values.forall(_>0))

    val sample=rdd.sampleByKey(withReplacement = true,fractions )
    val sample2=rdd.sampleByKeyExact(withReplacement = true,fractions )

    sample.foreach(x=>println(x))
    sample2.foreach(x=>println(x))






  }

}
